<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-us" lang="en-us">
<head>
  <link href="//gmpg.org/xfn/11" rel="profile">
  <meta http-equiv="content-type" content="text/html; charset=utf-8">
  <meta name="generator" content="Hugo 0.68.3" />

  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">

  <title>爬虫爬取百度图片 &middot; 我的博客</title>

  
  <link type="text/css" rel="stylesheet" href="/my_technology_blog/css/print.css" media="print">
  <link type="text/css" rel="stylesheet" href="/my_technology_blog/css/poole.css">
  <link type="text/css" rel="stylesheet" href="/my_technology_blog/css/syntax.css">
  <link type="text/css" rel="stylesheet" href="/my_technology_blog/css/hyde.css">
    <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Abril+Fatface|PT+Sans:400,400i,700">


  
  <link rel="apple-touch-icon-precomposed" sizes="144x144" href="/apple-touch-icon-144-precomposed.png">
  <link rel="shortcut icon" href="/favicon.png">

  
  
</head>

  <body class="theme-base-0b ">
  <aside class="sidebar">
  <div class="container sidebar-sticky">
    <div class="sidebar-about">
      <a href="/my_technology_blog/"><h1>我的博客</h1></a>
      <p class="lead">
       杨博的博客 
      </p>
    </div>

    <nav>
      <ul class="sidebar-nav">
        <li><a href="/my_technology_blog/">Home</a> </li>
        
      </ul>
    </nav>

    <p>&copy; 2021. All rights reserved. </p>
  </div>
</aside>

    <main class="content container">
    <div class="post">
  <h1>爬虫爬取百度图片</h1>
  <time datetime=2020-06-21T17:20:34&#43;0800 class="post-date">Sun, Jun 21, 2020</time>
  <div class="highlight"><pre style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4"><code class="language-python" data-lang="python"><span style="color:#f92672">from</span> urllib.request <span style="color:#f92672">import</span> urlopen, urlretrieve
<span style="color:#f92672">import</span> requests

<span style="color:#f92672">import</span> re

<span style="color:#75715e"># url为要爬取图片的网址</span>
url <span style="color:#f92672">=</span> <span style="color:#e6db74">&#34;http://image.baidu.com/search/index?tn=baiduimage&amp;ipn=r&amp;ct=201326592&amp;cl=2&amp;lm=-1&amp;st=-1&amp;fm=result&amp;fr=&amp;sf=1&amp;fmq=1580824135824_R&amp;pv=&amp;ic=0&amp;nc=1&amp;z=&amp;hd=&amp;latest=&amp;copyright=&amp;se=1&amp;showtab=0&amp;fb=0&amp;width=&amp;height=&amp;face=0&amp;istype=2&amp;ie=utf-8&amp;sid=&amp;word=</span><span style="color:#e6db74">%E</span><span style="color:#e6db74">9</span><span style="color:#e6db74">%82%</span><span style="color:#e6db74">B5</span><span style="color:#e6db74">%E</span><span style="color:#e6db74">5%B2%B1</span><span style="color:#e6db74">%E</span><span style="color:#e6db74">5</span><span style="color:#e6db74">%84%</span><span style="color:#e6db74">BF&#34;</span>


html <span style="color:#f92672">=</span> urlopen(url)

html_ym <span style="color:#f92672">=</span> html<span style="color:#f92672">.</span>read()<span style="color:#f92672">.</span>decode()

urls <span style="color:#f92672">=</span> re<span style="color:#f92672">.</span>findall(<span style="color:#e6db74">r</span><span style="color:#e6db74">&#39;&#34;objURL&#34;:&#34;(.*?)&#34;&#39;</span>,html_ym)

index <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span>

<span style="color:#66d9ef">for</span> url <span style="color:#f92672">in</span> urls:
    <span style="color:#66d9ef">if</span> index <span style="color:#f92672">&lt;=</span> <span style="color:#ae81ff">5</span>:
        res <span style="color:#f92672">=</span> requests<span style="color:#f92672">.</span>get(url<span style="color:#f92672">=</span>url)
        <span style="color:#66d9ef">with</span> open(<span style="color:#e6db74">&#34;index&#34;</span><span style="color:#f92672">+</span><span style="color:#e6db74">&#34;.jpg&#34;</span>, <span style="color:#e6db74">&#39;wb&#39;</span>) <span style="color:#66d9ef">as</span> f:
            f<span style="color:#f92672">.</span>write(res<span style="color:#f92672">.</span>content)
            <span style="color:#66d9ef">print</span>(index,<span style="color:#e6db74">&#34;已下载&#34;</span>)
            index <span style="color:#f92672">+=</span> <span style="color:#ae81ff">1</span>
    <span style="color:#66d9ef">else</span>:
        <span style="color:#66d9ef">break</span>
</code></pre></div>
</div>


    </main>

    
  </body>
</html>
